library(tidyverse)
library(countrycode)
library(lubridate)
library(RColorBrewer)
library(plotly)
df <- read_csv("refugeesAndMigrants.csv")
df$country <- df$`country of incident`
df$continent <- countrycode(sourcevar = df[['country']],
                            origin = "country.name",
                            destination = "continent")
## Warning in countrycode(sourcevar = df[["country"]], origin = "country.name", : Some values were not matched unambiguously: Hrvatska, unknown, Wales
df %>%
  summarize(sum_number = sum(number, na.rm =TRUE))
## # A tibble: 1 x 1
##   sum_number
##        <dbl>
## 1      34361
europe <- df %>%
  select(-`country of incident`, -`source`) %>%
  filter(continent == "Europe") %>% 
  group_by(country, `found dead`) %>%
  summarize(sum_number = sum(number, na.rm =TRUE), .groups = 'drop') %>%
  rename(date = `found dead`) %>%
  ungroup()
d0 <- europe %>%
  mutate(clean_date = dmy(date)) %>%
  mutate(clean_year = year(clean_date)) %>%
  filter((!is.na(clean_date))) %>%
  select(country, sum_number, clean_year) 
## Warning: Problem with `mutate()` input `clean_date`.
## ℹ  159 failed to parse.
## ℹ Input `clean_date` is `dmy(date)`.
d1 <- europe %>%
  mutate(clean_date = dmy(date)) %>%
  filter(is.na(clean_date)) %>%
  mutate(check= parse_number(date)) 
## Warning: Problem with `mutate()` input `clean_date`.
## ℹ  159 failed to parse.
## ℹ Input `clean_date` is `dmy(date)`.
d1$clean_year <- NA
for(i in 1:nrow(d1)){
  if(d1[i,5] > 1000){
    # paste into position i of vector m
    d1$clean_year[i] <- d1[i,5]
  } else if(d1[i,5] > 18){
    # paste into position i of vector m
    d1$clean_year[i]  <- d1[i,5] + 1900
  } else if(d1[i,5] <= 18) {
    d1$clean_year[i]  <- d1[i,5] + 2000

  }
}
d1 <- d1 %>%
  mutate(clean_year = as.numeric(as.character(unlist(d1$clean_year)))) %>%
  select(country, sum_number, clean_year)
europe_1 <- rbind(d0,d1)
europe_1 <- europe_1 %>%
  group_by(country, clean_year) %>%
  arrange(country, clean_year)
europe_2 <- europe_1 %>% 
  group_by(clean_year, country) %>%
  summarise(sum_max_amount = sum(sum_number), .groups = 'drop') %>%
  arrange(clean_year, country)
europeanUnion <- c("Austria","Belgium","Bulgaria","Croatia","Cyprus",
                   "Czech Rep.","Denmark","Estonia","Finland","France",
                   "Germany","Greece","Hungary","Ireland","Italy","Latvia",
                   "Lithuania","Luxembourg","Malta","Netherlands","Poland",
                   "Portugal","Romania","Slovakia","Slovenia","Spain",
                   "Sweden","United Kingdom")

eu <- as.data.frame(europeanUnion)
europe_2 <- full_join(europe_2, eu, by = c("country"="europeanUnion"))
europe_3 <- europe_2 %>%
     group_by(country) %>% 
     complete(clean_year = 1993:2018) %>% 
     select(country, Year = clean_year, sum_max_amount) %>%
  arrange(country) %>%
  replace(is.na(.), 0) %>% 
  mutate(Cumulative_Deaths = cumsum(sum_max_amount))
europe_3$code <- countrycode(europe_3$country,"country.name", "iso3c")
europe_3 <- europe_3 %>%
  filter(Year != 0) %>%
  mutate(hover=paste0(country, "\n",Cumulative_Deaths, " Deaths"))
write_csv(europe_3, "europe_3.csv")
europe_3 <- read_csv("europe_3.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   country = col_character(),
##   Year = col_double(),
##   sum_max_amount = col_double(),
##   Cumulative_Deaths = col_double(),
##   code = col_character(),
##   hover = col_character()
## )
library(plotly)

g <- list(scope = 'europe')

graph <- plot_geo(europe_3, frame = ~Year) %>%
  add_trace(
    z = ~Cumulative_Deaths, 
    zmin = 0,
    zmax = max(europe_3$Cumulative_Deaths), 
    locations = ~code,
    color = ~Cumulative_Deaths, 
    colorscale = 'Purples',
    text = ~hover,
    hoverinfo='text') %>%
  colorbar(title = "") %>%
  layout(title= list(text = "\nTotal Cumulative Migrant Deaths in Europe\n(1993-2018)", 
                     x = 0.01), 
         margin = list(b = 50, l = 50),
         geo = list(scope = 'europe', 
                    lataxis = list(range = c(30, 80)),
                    lonaxis = list(range = c(-10, 40)))) %>%
  config(displayModeBar = FALSE)
graph
# not working not used
#start_date <- as.Date('1993/1/1')
#date_range <- seq(start_date, by = 'year', length.out = 26)
#date_range <- year(date_range)
#date_range <- as.data.frame(date_range)
#europe_3 <- full_join(europe_2, date_range, by = c("clean_year"="date_range"))
## not working
#europe_1 %>%
#  group_by(country) %>%
#  mutate(cum_death = sum_number + lag(sum_number, default=first(sum_number)))

second graph

text <- df %>%
  filter(continent == "Europe") %>%
  mutate(description = `cause of death`) %>%
  select(`found dead`, `number`, `country`, continent, description) %>%
  rename(date = `found dead`,
         deaths = `number`,
         country = `country`) %>%
  mutate(description = tolower(description)) 
sea <- c("boat", "drown", "drowning", "sea", "ocean")
vehicle <- c("stowaway", "car", "vehicle", 
             "plane", "airplane", "train", "truck", "van")
fire <- c("arson", "fire", "gas", "gasoline")
suicide <- c("suicide", "hanged", "hang", "jump", 
             "drugs", "drug", "hungerstrike", "suffocate", "suffocated")
murder <- c("fight", "murder", "murdered")

suicide_muder <- c("suicide", "hanged", "hang", "jump", 
             "drugs", "drug", "hungerstrike", "suffocate", "suffocated",
             "fight", "murder", "murdered")
text_1 <- text %>%  
  mutate(sea = case_when(grepl(paste(sea, collapse="|"), description) ~ "Sea",
                            TRUE ~ "Other")) %>%
  mutate(vehicle = case_when(grepl(paste(vehicle, collapse="|"), description) ~ "Vehicle",
                            TRUE ~ "Other")) %>%
  mutate(fire = case_when(grepl(paste(fire, collapse="|"), description) ~ "Fire",
                            TRUE ~ "Other")) %>%
  mutate(suicide = case_when(grepl(paste(suicide, collapse="|"), description) ~ "Suicide",
                            TRUE ~ "Other")) %>%
  mutate(murder = case_when(grepl(paste(suicide, collapse="|"), description) ~ "Murder",
                            TRUE ~ "Other")) 
## Stopped Here have to multiply deaths by number of occurrences for cause of death
text_2 <- text %>%  
  mutate(cause = case_when(grepl(paste(sea, collapse="|"), description) ~ "Sea",
                           grepl(paste(vehicle, collapse="|"), description) ~ "Vehicle",
                           grepl(paste(fire, collapse="|"), description) ~ "Fire",
                           grepl(paste(suicide, collapse="|"), description) ~ "Suicide",
                           grepl(paste(murder, collapse="|"), description) ~ "Murder",
                            TRUE ~ "Other"))  %>%
  group_by(cause, country, deaths) %>%
  tally()
graph2 <- text_2 %>%
  mutate(cumulative_deaths = deaths * n)
# workshopping graph
graph2 %>%
  filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
  group_by(country, cause) %>%
  arrange(desc(cumulative_deaths)) %>%
  ggplot(aes(x = cumulative_deaths, y = reorder(cause, cumulative_deaths),
             label=cumulative_deaths)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~country, ncol = 2, scales = "free") +
  labs(x = "Frequency",
       y = 'Cause of Death') +
  ggtitle("Cause of Death by Country") +
  theme(plot.title = element_text(vjust=2, hjust = 0.5),
        legend.position =  'none') + 
  ylim('Vehicle', 'Other', 'Sea')
## Warning: Removed 19 rows containing missing values (position_stack).

# workshopping graph in percent  - stopped here
check <- graph2 %>%
  filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
  group_by(country, cause) %>%
  arrange(desc(cumulative_deaths)) %>%
  ggplot(aes(x = ..prop.., y = cause,  group = 1)) +
  geom_bar(show.legend = FALSE, stat = 'count') +
  facet_wrap(~country, ncol = 2, scales = "free") +
  labs(x = "Percentage",
       y = 'Cause of Death') +
  ggtitle("Cause of Death by Country") +
  theme(plot.title = element_text(vjust=2, hjust = 0.5),
        legend.position =  'none') + 
  ylim('Suicide', 'Murder', 'Fire','Vehicle', 'Other', 'Sea') + 
  scale_x_continuous(labels = scales::percent_format()) +
  coord_cartesian(xlim=c(0, 1))

check

graph2 %>%
  filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
  group_by(country) %>%
  mutate(percent = prop.table(cumulative_deaths) * 100) %>%
  arrange(desc(percent)) %>%
  group_by(cause, country)
## # A tibble: 223 x 6
## # Groups:   cause, country [23]
##    cause   country deaths     n cumulative_deaths percent
##    <chr>   <chr>    <dbl> <int>             <dbl>   <dbl>
##  1 Sea     Greece     360     1               360   19.1 
##  2 Vehicle France       1    81                81   10.4 
##  3 Sea     Italy      268     1               268    8.27
##  4 Sea     Italy      250     1               250    7.71
##  5 Sea     Spain        1   242               242    7.58
##  6 Sea     France      19     3                57    7.32
##  7 Sea     France      26     2                52    6.68
##  8 Sea     Italy      100     2               200    6.17
##  9 Sea     France      23     2                46    5.91
## 10 Sea     Italy      177     1               177    5.46
## # … with 213 more rows
graph3 <- graph2 %>%
  filter(country %in% c('Spain', 'Italy', 'Greece', 'France')) %>%
  group_by(country, cause) %>%
  summarize(sum_deaths = sum(cumulative_deaths)) %>%
  mutate(percent = round((prop.table(sum_deaths) * 100), 2)) 
## `summarise()` has grouped output by 'country'. You can override using the `.groups` argument.
graph3
## # A tibble: 23 x 4
## # Groups:   country [4]
##    country cause   sum_deaths percent
##    <chr>   <chr>        <dbl>   <dbl>
##  1 France  Fire            29    3.72
##  2 France  Murder           5    0.64
##  3 France  Other           40    5.13
##  4 France  Sea            609   78.2 
##  5 France  Suicide         11    1.41
##  6 France  Vehicle         85   10.9 
##  7 Greece  Fire             4    0.21
##  8 Greece  Murder           1    0.05
##  9 Greece  Other          202   10.7 
## 10 Greece  Sea           1595   84.8 
## # … with 13 more rows
write_csv(graph3, "graph3.csv")
graph3$cause <- factor(graph3$cause , ordered = TRUE, 
                                levels = c("Sea", "Other", "Vehicle", "Fire",
                                           'Murder', "Suicide"))

my_colors <- c("blue1", "#4DAF4A", "#FF7F00", 
               "#E41A1C", "#FFFF33", "#984EA3")
barplot <- ggplot(graph3, aes(x = percent, y = cause, fill = cause)) +
  geom_col(show.legend = FALSE, col = 'black') +
  facet_wrap(~country, ncol = 2) +
  labs(x = "Percentage",
       y = 'Cause of Death') +
  ggtitle("Cause of Death by Country\n(1993-2018)") +
  ylim('Suicide', 'Murder', 'Fire','Vehicle', 'Other', 'Sea') +
  xlim(0,100) + 
  scale_fill_manual(values = my_colors) +
 geom_text(
    aes(x = percent, y = cause, label = paste0(percent, "%")), 
    hjust = -0.5, size = 3.5,
    position = position_dodge(width = 1),
    inherit.aes = TRUE
  ) +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        strip.text = element_text(size=15),
        plot.title = element_text(family = "sans", hjust = 0.5,
                                  size = 18, margin=margin(0,0,10,0)),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        panel.border = element_rect(colour = "black", fill = NA))

barplot

## checking summary statistics

graph2 %>%
  filter(country %in% c('France')) %>%
  filter(cause == 'Sea') %>%
  summarize(d = sum(cumulative_deaths))
## `summarise()` has grouped output by 'cause'. You can override using the `.groups` argument.
## # A tibble: 1 x 3
## # Groups:   cause [1]
##   cause country     d
##   <chr> <chr>   <dbl>
## 1 Sea   France    609
europe_3 %>%
  summarize(sum = sum(sum_max_amount))
## # A tibble: 1 x 1
##     sum
##   <dbl>
## 1 10041
text %>%
  summarize(sum = sum(deaths))
## # A tibble: 1 x 1
##     sum
##   <dbl>
## 1 10041
df %>%
  filter(country == "Spain") %>%
  summarize(deaths = sum(number))
## # A tibble: 1 x 1
##   deaths
##    <dbl>
## 1   3191